The dataset this week comes from Kaggle and contains all Scooby Doo episodes. More info about Scooby Doo can be found on ScoobyPedia.

1 Setup

pacman::p_load(
  tidytuesdayR,
  tidyverse,
  lubridate,
  magrittr,
  glue,
  janitor,
  skimr,
  here
)

2 Raw Data

Get data and write to local file

tt_load(2021, week = 29) %>% 
  pluck("scoobydoo") %>% 
  write_csv2(here("data.csv"))

Read data from local file (d_raw)

d_raw <- read_csv2(
  here("data.csv"),
  col_types = cols(.default = "c"),
  na = c("NA", "NULL")
)

Create working copy (d)

d <- d_raw

3 Inspection

d
glimpse(d)
## Rows: 603
## Columns: 75
## $ index                    <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", …
## $ series_name              <chr> "Scooby Doo, Where Are You!", "Scooby Doo, Wh…
## $ network                  <chr> "CBS", "CBS", "CBS", "CBS", "CBS", "CBS", "CB…
## $ season                   <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ title                    <chr> "What a Night for a Knight", "A Clue for Scoo…
## $ imdb                     <chr> "8.1", "8.1", "8", "7.8", "7.5", "8.4", "7.6"…
## $ engagement               <chr> "556", "479", "455", "426", "391", "384", "35…
## $ date_aired               <chr> "1969-09-13", "1969-09-20", "1969-09-27", "19…
## $ run_time                 <chr> "21", "22", "21", "21", "21", "21", "21", "21…
## $ format                   <chr> "TV Series", "TV Series", "TV Series", "TV Se…
## $ monster_name             <chr> "Black Knight", "Ghost of Cptn. Cuttler", "Ph…
## $ monster_gender           <chr> "Male", "Male", "Male", "Male", "Female", "Ma…
## $ monster_type             <chr> "Possessed Object", "Ghost", "Ghost", "Ancien…
## $ monster_subtype          <chr> "Suit", "Suit", "Phantom", "Miner", "Witch Do…
## $ monster_species          <chr> "Object", "Human", "Human", "Human", "Human",…
## $ monster_real             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ monster_amount           <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ caught_fred              <chr> "FALSE", "FALSE", "FALSE", "TRUE", "FALSE", "…
## $ caught_daphnie           <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_velma             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_shaggy            <chr> "TRUE", "TRUE", "FALSE", "FALSE", "FALSE", "F…
## $ caught_scooby            <chr> "TRUE", "FALSE", "TRUE", "FALSE", "TRUE", "FA…
## $ captured_fred            <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_daphnie         <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_velma           <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_shaggy          <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ captured_scooby          <chr> "FALSE", "FALSE", "FALSE", "FALSE", "TRUE", "…
## $ unmask_fred              <chr> "FALSE", "TRUE", "TRUE", "TRUE", "FALSE", "TR…
## $ unmask_daphnie           <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_velma             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_shaggy            <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_scooby            <chr> "TRUE", "FALSE", "FALSE", "FALSE", "TRUE", "F…
## $ snack_fred               <chr> "TRUE", "FALSE", "TRUE", "FALSE", "FALSE", "T…
## $ snack_daphnie            <chr> "FALSE", "FALSE", "FALSE", "TRUE", "TRUE", "F…
## $ snack_velma              <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ snack_shaggy             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ snack_scooby             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_other             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_other             <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_not               <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ trap_work_first          <chr> NA, "FALSE", "FALSE", "TRUE", NA, "TRUE", "FA…
## $ setting_terrain          <chr> "Urban", "Coast", "Island", "Cave", "Desert",…
## $ setting_country_state    <chr> "United States", "United States", "United Sta…
## $ suspects_amount          <chr> "2", "2", "0", "2", "1", "2", "1", "2", "1", …
## $ non_suspect              <chr> "FALSE", "TRUE", "TRUE", "FALSE", "FALSE", "F…
## $ arrested                 <chr> "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE…
## $ culprit_name             <chr> "Mr. Wickles", "Cptn. Cuttler", "Bluestone th…
## $ culprit_gender           <chr> "Male", "Male", "Male", "Male", "Male", "Male…
## $ culprit_amount           <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ motive                   <chr> "Theft", "Theft", "Treasure", "Natural Resour…
## $ if_it_wasnt_for          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "thes…
## $ and_that                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "dog"…
## $ door_gag                 <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ number_of_snacks         <chr> "2", "1", "3", "2", "2", "4", "4", "0", "1", …
## $ split_up                 <chr> "1", "0", "0", "1", "0", "0", "1", "0", "0", …
## $ another_mystery          <chr> "1", "0", "0", "0", "1", "0", "0", "0", "0", …
## $ set_a_trap               <chr> "0", "0", "0", "0", "0", "0", "1", "1", "0", …
## $ jeepers                  <chr> "0", "0", "0", "0", "0", "1", "0", "0", "0", …
## $ jinkies                  <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", …
## $ my_glasses               <chr> "1", "0", "0", "0", "1", "0", "0", "1", "0", …
## $ just_about_wrapped_up    <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", …
## $ zoinks                   <chr> "1", "3", "1", "2", "0", "2", "1", "0", "0", …
## $ groovy                   <chr> "0", "0", "2", "1", "0", "0", "1", "0", "0", …
## $ scooby_doo_where_are_you <chr> "0", "1", "0", "0", "1", "0", "0", "1", "0", …
## $ rooby_rooby_roo          <chr> "1", "0", "0", "0", "0", "1", "1", "1", "1", …
## $ batman                   <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ scooby_dum               <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ scrappy_doo              <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ hex_girls                <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ blue_falcon              <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ fred_va                  <chr> "Frank Welker", "Frank Welker", "Frank Welker…
## $ daphnie_va               <chr> "Stefanianna Christopherson", "Stefanianna Ch…
## $ velma_va                 <chr> "Nicole Jaffe", "Nicole Jaffe", "Nicole Jaffe…
## $ shaggy_va                <chr> "Casey Kasem", "Casey Kasem", "Casey Kasem", …
## $ scooby_va                <chr> "Don Messick", "Don Messick", "Don Messick", …

3.1 Coltypes

Names of columns that appear logical at first glance

# specify columns
col_lgl <- d %>% 
  select(c(
    "arrested",
    "batman",
    "blue_falcon",
    "door_gag",
    "hex_girls",
    "monster_real",
    "non_suspect",
    "scooby_dum",
    "scrappy_doo",
    "trap_work_first",
    starts_with("captured_"),
    starts_with("caught_"),
    starts_with("snack_"),
    starts_with("unmask_")
  )) %>% 
  colnames()

Names of columns that appear numeric at first glance

# specify columns
col_num <- c(
  "another_mystery",
  "culprit_amount",
  "engagement",
  "groovy",
  "imdb",
  "index",
  "jeepers",
  "jinkies",
  "just_about_wrapped_up",
  "monster_amount",
  "my_glasses",
  "number_of_snacks",
  "rooby_rooby_roo",
  "run_time",
  "scooby_doo_where_are_you",
  "season",
  "set_a_trap",
  "split_up",
  "suspects_amount",
  "zoinks")

4 Wrangling

4.1 Types

4.1.1 Date

Parse date from character vector date_aired

# parse date
d %<>% mutate(aired_date = ymd(date_aired))

# verify result
d %>% 
  select(date_aired, aired_date) %>% 
  slice_sample(n = 15)
# drop original column
d %<>% select(-date_aired)

Extract date elements from aired_date

# get year and month
d %<>% mutate(
  aired_year = year(aired_date),
  aired_month = month(aired_date, label = TRUE)
) 

# get decade
d %<>% mutate(
  aired_decade = glue("{floor(aired_year/10) * 10}s")
) %>% as_factor()

# verify result
d %>% select(
    aired_date,
    aired_year,
    aired_month,
    aired_decade) %>% 
  slice_sample(n = 15)

4.1.2 Logical

Show all unique values in the columns that are presumed to be logical to manually verify this assumption.

# print unique values
d %>% 
  select(col_lgl) %>% 
  map(~unique(.x)) %>% 
  flatten_chr() %>% 
  unique()
## [1] "TRUE"  "FALSE" NA

There are indeed no other values than "TRUE"/"FALSE" or NA.

# convert to logical
d %<>% mutate(across(col_lgl, as.logical))

# verify result
d %>% select(col_lgl) %>% slice_sample(n = 15)

4.1.3 Numeric

Do the (presumed) numeric columns contain non-numeric characters?

# store unique values per column
uniq <- d %>% 
  select(col_num) %>% 
  map(~unique(.x)) 

# identify columns with non-numeric characters
non_num <- uniq %>%
  map( ~ str_detect(.x, "\\D") %>%
         replace_na(FALSE) %>%
         any()) %>%
  flatten_lgl()

# print unique values in columns with non-numeric characters
uniq[non_num]
## $imdb
##  [1] "8.1" "8"   "7.8" "7.5" "8.4" "7.6" "8.2" "8.5" "8.7" "8.3" "7.7" "7.9"
## [13] "6.9" "7.4" "7.2" "7.3" "7"   "6.3" "6.8" "7.1" "6.6" "6.4" "6"   "6.7"
## [25] "6.5" "6.2" "6.1" "5.8" "5.1" "5"   "5.9" "5.5" "5.3" "5.2" "5.4" "5.6"
## [37] "4.8" "4.6" "4.9" "8.8" "8.6" "9.2" "9.1" "8.9" "9"   "9.3" "9.6" "4.2"
## [49] "5.7" NA   
## 
## $number_of_snacks
##  [1] "2"                 "1"                 "3"                
##  [4] "4"                 "0"                 "1 box"            
##  [7] "a couple"          "6"                 "several"          
## [10] "3 boxes"           "truck load"        "5"                
## [13] "2 boxes"           "lifetime supply"   NA                 
## [16] "8"                 "10"                "wheel barrel full"
## [19] "several boxes"    
## 
## $season
## [1] "1"         "2"         "Crossover" "3"         "Movie"     "Special"  
## [7] "4"

The only non-numeric character in imdb is the decimal ., so this column can easily be converted to numeric. On the other hand, for season and number_of_snacks this doesn’t make sense.

# exclude season and number of snacks
col_num_final <- col_num[!(col_num %in% c("season", "number_of_snacks"))]

Keep the original columns (temporarily) for easy verification.

# convert to numeric and add suffix
d_num <- d %>% 
  select(all_of(col_num_final)) %>%  
  map_dfr(as.numeric) %>% 
  rename_with( ~ glue("{.x}_num"))

# bind converted data to original data
d %<>% bind_cols(d_num)

# sort columns alphabetically
d %<>% select(sort(colnames(.)))

# verify result
d %>% select(sort(c(
  col_num_final, glue("{col_num_final}_num")
))) %>% slice_sample(n = 15)
# drop old columns and remove suffix from new columns
d %<>%
  select(-(col_num_final)) %>%
  rename_with(~ str_remove(.x, "_num"))

4.2 Nesting

Identify character columns containing ≥1 comma (might indicate nested data)

d_chr <- d %>% select(where(is.character))
d_chr[d_chr %>%
        map( ~ any(str_detect(.x, ","))) %>%
        flatten_lgl() %>%
        replace_na(FALSE)]

Create nested list columns where applicable

# note to self: not working, figure out why...
col_nested <-
  c(
    "culprit_gender",
    "culprit_name",
    "monster_gender",
    "monster_name",
    "monster_species",
    "monster_type",
    "monster_subtype"
  )

d %<>% mutate(
  across(
    all_of(col_nested),
      ~ str_squish(str_split(.x, ","))
  )
)
# the non-elegant -but working- alternative
d$culprit_gender %<>% str_split(",")
d$culprit_name %<>% str_split(",")
d$monster_gender %<>% str_split(",")
d$monster_name %<>% str_split(",")
d$monster_species %<>% str_split(",")
d$monster_type %<>% str_split(",")
d$monster_subtype %<>% str_split(",")

4.3 Rename

Rename some columns for consistency and easy sorting

# change suffix "_va" into prefix "va_" (voice actor)
d %<>% rename_with( 
  ~ glue("va_{str_remove(.x, '_va')}"), 
  ends_with("_va"))
# add prefix "appears_" to indicate appearances in episodes
d %<>% rename_with(
  ~ glue("appears_{.x}"),
  c(
    "batman",
    "scooby_dum",
    "scrappy_doo",
    "hex_girls",
    "blue_falcon"
  )
)
# add prefix "quote_" to counts of words/phrases
d %<>% rename_with(
  ~ glue("quote_{.x}"),
  c(
    "jeepers",
    "jinkies",
    "my_glasses",
    "just_about_wrapped_up",
    "zoinks",
    "groovy",
    "scooby_doo_where_are_you",
    "rooby_rooby_roo"
  )
)
# sort columns alphabetically
d %<>% select(sort(colnames(.)))

4.4 Reshape

Create tidy subsets of the data

4.4.1 Captured

d_captured <- d %>% 
  select(index, starts_with("captured_")) %>% 
  pivot_longer(
    cols = starts_with("captured_"), 
    names_to = "character",
    values_to = "captured") %>% 
  mutate(across(
    "character", 
    ~str_remove(.x, "captured_") %>% str_to_title()
  ))

d_captured %>% head()

4.4.2 Caught

d_caught <- d %>% 
  select(index, starts_with("caught_")) %>% 
  pivot_longer(
    cols = starts_with("caught_"), 
    names_to = "character",
    values_to = "caught") %>% 
  mutate(across(
    "character", 
    ~str_remove(.x, "caught_") %>% str_to_title()
  ))

d_caught %>% head()

4.4.3 Unmask

d_unmask <- d %>% 
  select(index, starts_with("unmask_")) %>% 
  pivot_longer(
    cols = starts_with("unmask_"), 
    names_to = "character",
    values_to = "unmask") %>% 
  mutate(across(
    "character", 
    ~str_remove(.x, "unmask_") %>% str_to_title()
  ))

d_unmask %>% head()

4.4.4 Snack

d_snack <- d %>% 
  select(index, starts_with("snack_")) %>% 
  pivot_longer(
    cols = starts_with("snack_"), 
    names_to = "character",
    values_to = "snack") %>% 
  mutate(across(
    "character", 
    ~str_remove(.x, "snack_") %>% str_to_title()
  ))

d_snack %>% head()

4.4.5 Quote

d_quote <- d %>%
  select(index, starts_with("quote_")) %>%
  pivot_longer(
    cols = starts_with("quote_"),
    names_to = "quote",
    values_to = "n") %>%
  mutate(across(
    "quote",
    ~ str_remove(.x, "quote_") %>%
      str_replace_all("_", " ") %>%
      str_to_sentence()
  ))

d_quote %>% head()

4.4.6 Voice Actor

d_voice <- d %>%
  select(index, starts_with("va_")) %>%
  pivot_longer(
    cols = starts_with("va_"),
    names_to = "character",
    values_to = "voice") %>%
  mutate(across(
    "character",
    ~ str_remove(.x, "va_") %>%
      str_to_title()
  ))

d_voice %>% head()

4.4.7 Monsters

d_monster_type <- d %>% 
  select(index, monster_type) %>% 
  unnest(cols = c(monster_type)) %>% 
  drop_na(monster_type) %>% 
  filter(monster_type != "") %>% 
  mutate(monster_type = str_trim(monster_type)) %>% 
  mutate(monster_type = recode(
    monster_type,
    Disugised = "Disguised",
    Disguise = "Disguised",
    `Possessed Object` = "Possessed"))

d_monster_type %>% head()
d_monster_subtype <- d %>% 
  select(index, monster_subtype) %>% 
  unnest(cols = c(monster_subtype)) %>% 
  drop_na(monster_subtype) %>% 
  filter(monster_subtype != "") %>% 
  mutate(monster_subtype = str_trim(monster_subtype))

d_monster_subtype %>% head()
d_monster_species <- d %>% 
  select(index, monster_species) %>% 
  unnest(cols = c(monster_species)) %>% 
  drop_na(monster_species) %>% 
  filter(monster_species != "") %>% 
  mutate(monster_species = str_trim(monster_species))

d_monster_species %>% head()
d_monster_gender <- d %>% 
  select(index, monster_gender) %>% 
  unnest(cols = c(monster_gender)) %>% 
  drop_na(monster_gender) %>% 
  filter(monster_gender != "") %>% 
  mutate(monster_gender = str_trim(monster_gender))

d_monster_gender %>% head()

4.4.8 Culprits

d_culprit <- d %>% 
  select(index, culprit_gender) %>% 
  unnest(cols = c(culprit_gender)) %>% 
  drop_na(culprit_gender) %>% 
  filter(culprit_gender != "") %>% 
  mutate(culprit_gender = str_trim(culprit_gender))

d_culprit %>% head()

5 Clean Data

Main dataset after cleaning and wrangling.

d

6 Descriptives

6.1 Skim

skim(d)
Data summary
Name d
Number of rows 603
Number of columns 78
_______________________
Column type frequency:
character 17
Date 1
factor 1
list 7
logical 33
numeric 19
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
aired_decade 0 1.00 5 5 0 7 0
and_that 528 0.12 3 80 0 64 0
format 0 1.00 5 21 0 5 0
if_it_wasnt_for 414 0.31 3 116 0 107 0
motive 67 0.89 4 16 0 27 0
network 0 1.00 3 20 0 11 0
number_of_snacks 1 1.00 1 17 0 18 0
season 0 1.00 1 9 0 7 0
series_name 0 1.00 4 42 0 29 0
setting_country_state 0 1.00 4 16 0 79 0
setting_terrain 0 1.00 3 8 0 15 0
title 0 1.00 4 76 0 602 0
va_daphnie 165 0.73 11 26 0 9 0
va_fred 219 0.64 9 18 0 5 0
va_scooby 28 0.95 10 12 0 5 0
va_shaggy 32 0.95 10 15 0 7 0
va_velma 218 0.64 9 19 0 12 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
aired_date 0 1 1969-09-13 2021-02-25 1988-09-10 448

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
aired_month 0 1 TRUE 12 okt: 158, sep: 105, nov: 96, dec: 56

Variable type: list

skim_variable n_missing complete_rate n_unique min_length max_length
culprit_gender 163 0.73 23 1 11
culprit_name 163 0.73 433 1 11
monster_gender 87 0.86 43 1 19
monster_name 87 0.86 481 1 17
monster_species 87 0.86 194 1 19
monster_subtype 88 0.85 269 1 19
monster_type 87 0.86 132 1 19

Variable type: logical

skim_variable n_missing complete_rate mean count
appears_batman 0 1.00 0.01 FAL: 599, TRU: 4
appears_blue_falcon 0 1.00 0.05 FAL: 570, TRU: 33
appears_hex_girls 0 1.00 0.01 FAL: 597, TRU: 6
appears_scooby_dum 0 1.00 0.03 FAL: 586, TRU: 17
appears_scrappy_doo 0 1.00 0.27 FAL: 438, TRU: 165
arrested 155 0.74 0.85 TRU: 381, FAL: 67
captured_daphnie 165 0.73 0.21 FAL: 347, TRU: 91
captured_fred 219 0.64 0.18 FAL: 313, TRU: 71
captured_scooby 28 0.95 0.14 FAL: 492, TRU: 83
captured_shaggy 32 0.95 0.15 FAL: 486, TRU: 85
captured_velma 218 0.64 0.19 FAL: 311, TRU: 74
caught_daphnie 165 0.73 0.07 FAL: 409, TRU: 29
caught_fred 219 0.64 0.34 FAL: 252, TRU: 132
caught_not 0 1.00 0.05 FAL: 572, TRU: 31
caught_other 0 1.00 0.14 FAL: 519, TRU: 84
caught_scooby 28 0.95 0.28 FAL: 415, TRU: 160
caught_shaggy 32 0.95 0.13 FAL: 494, TRU: 77
caught_velma 218 0.64 0.11 FAL: 344, TRU: 41
door_gag 0 1.00 0.10 FAL: 544, TRU: 59
monster_real 87 0.86 0.22 FAL: 404, TRU: 112
non_suspect 160 0.73 0.10 FAL: 397, TRU: 46
snack_daphnie 165 0.73 0.11 FAL: 389, TRU: 49
snack_fred 219 0.64 0.05 FAL: 366, TRU: 18
snack_scooby 27 0.96 0.02 FAL: 564, TRU: 12
snack_shaggy 31 0.95 0.08 FAL: 529, TRU: 43
snack_velma 218 0.64 0.08 FAL: 356, TRU: 29
trap_work_first 354 0.41 0.50 TRU: 125, FAL: 124
unmask_daphnie 165 0.73 0.08 FAL: 401, TRU: 37
unmask_fred 219 0.64 0.27 FAL: 282, TRU: 102
unmask_other 0 1.00 0.06 FAL: 568, TRU: 35
unmask_scooby 28 0.95 0.04 FAL: 552, TRU: 23
unmask_shaggy 32 0.95 0.02 FAL: 558, TRU: 13
unmask_velma 218 0.64 0.24 FAL: 291, TRU: 94

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
aired_year 0 1.00 1994.05 16.99 1969.0 1979.0 1988.0 2011.5 2021.0 ▇▇▁▅▇
another_mystery 219 0.64 0.18 0.41 0.0 0.0 0.0 0.0 3.0 ▇▂▁▁▁
culprit_amount 0 1.00 1.04 1.07 0.0 0.0 1.0 1.0 11.0 ▇▁▁▁▁
engagement 15 0.98 580.33 4807.92 7.0 27.0 54.5 128.2 100951.0 ▇▁▁▁▁
imdb 15 0.98 7.28 0.73 4.2 6.9 7.3 7.7 9.6 ▁▁▇▆▁
index 0 1.00 302.00 174.22 1.0 151.5 302.0 452.5 603.0 ▇▇▇▇▇
monster_amount 0 1.00 1.75 2.29 0.0 1.0 1.0 2.0 19.0 ▇▁▁▁▁
quote_groovy 32 0.95 0.06 0.62 0.0 0.0 0.0 0.0 14.0 ▇▁▁▁▁
quote_jeepers 165 0.73 0.56 1.30 0.0 0.0 0.0 1.0 10.0 ▇▁▁▁▁
quote_jinkies 218 0.64 1.31 1.92 0.0 0.0 1.0 2.0 13.0 ▇▁▁▁▁
quote_just_about_wrapped_up 218 0.64 0.05 0.22 0.0 0.0 0.0 0.0 1.0 ▇▁▁▁▁
quote_my_glasses 218 0.64 0.12 0.34 0.0 0.0 0.0 0.0 2.0 ▇▁▁▁▁
quote_rooby_rooby_roo 28 0.95 0.72 0.79 0.0 0.0 1.0 1.0 7.0 ▇▁▁▁▁
quote_scooby_doo_where_are_you 32 0.95 0.13 0.42 0.0 0.0 0.0 0.0 4.0 ▇▁▁▁▁
quote_zoinks 32 0.95 2.15 2.73 0.0 0.0 1.0 3.0 26.0 ▇▁▁▁▁
run_time 0 1.00 23.52 17.21 4.0 12.0 22.0 23.0 94.0 ▇▃▁▁▁
set_a_trap 219 0.64 0.12 0.34 0.0 0.0 0.0 0.0 2.0 ▇▁▁▁▁
split_up 219 0.64 0.34 0.55 0.0 0.0 0.0 1.0 2.0 ▇▁▃▁▁
suspects_amount 0 1.00 2.85 2.62 0.0 1.0 3.0 4.0 20.0 ▇▂▁▁▁

6.2 Shows

Number of episodes and movies by decade

d %>%
  select(aired_decade, format) %>%
  mutate(format = recode(
    format,
    `TV Series (segmented)` = "TV Series",
    `Movie (Theatrical)` = "Movie"
  )) %>%
  group_by(aired_decade) %>%
  count(format) %>%
  pivot_wider(names_from = format,
              values_from = n) %>%
  mutate(across(
    c("TV Series", "Crossover", "Movie"), 
    ~ replace_na(.x, 0))) %>% 
  rename(Decade = aired_decade)

6.3 Rating

IMDb rating by decade

d %>% 
  group_by(aired_decade) %>% 
  rstatix::get_summary_stats(imdb) %>% 
  select(-variable)

Highest rated show

d %>% 
  filter(imdb == max(imdb, na.rm = TRUE)) %>% 
  select(aired_date, format, title, imdb) %>% 
  rename(date = aired_date)

Lowest rated show

d %>% 
  filter(imdb == min(imdb, na.rm = TRUE)) %>% 
  select(aired_date, format, title) %>% 
  rename(date = aired_date)

6.4 Captured

d_captured %>% 
  filter(!is.na(captured)) %>% 
  tabyl(character, captured) %>%
  adorn_percentages(denominator = "row") %>%
  arrange(desc(`TRUE`)) %>%
  adorn_pct_formatting() %>%
  select(character, `TRUE`, `FALSE`)

6.5 Caught

d_caught %>% 
  filter(!is.na(caught)) %>% 
  tabyl(character, caught) %>%
  adorn_percentages(denominator = "row") %>%
  arrange(desc(`TRUE`)) %>%
  adorn_pct_formatting() %>%
  select(character, `TRUE`, `FALSE`)

6.6 Unmask

d_unmask %>% 
  filter(!is.na(unmask)) %>% 
  tabyl(character, unmask) %>%
  adorn_percentages(denominator = "row") %>%
  arrange(desc(`TRUE`)) %>%
  adorn_pct_formatting() %>%
  select(character, `TRUE`, `FALSE`)

6.7 Snack

d_snack %>%
  filter(!is.na(snack)) %>%
  tabyl(character, snack) %>%
  adorn_percentages(denominator = "row") %>%
  arrange(desc(`TRUE`)) %>%
  adorn_pct_formatting() %>%
  select(character, `TRUE`, `FALSE`)

6.8 Quote

d_quote %>% 
  group_by(quote) %>% 
  summarise(total = sum(n, na.rm = TRUE)) %>% 
  arrange(desc(total))

6.9 Voice Actor

d_voice %>% 
  drop_na(voice) %>% 
  group_by(character) %>% 
  count(voice)

6.10 Monsters

6.10.1 Monster gender

d_monster_gender %>% 
  tabyl(monster_gender) %>% 
  arrange(desc(n)) %>% 
  adorn_pct_formatting() %>% 
  rename(gender = monster_gender)

6.10.2 Monster type

Top 10 of most frequent monster types

d_monster_type %>% 
  tabyl(monster_type) %>% 
  slice_max(order_by = n, n = 10) %>% 
  adorn_pct_formatting() %>% 
  rename(type = monster_type)

6.10.3 Monster subtype

Top 10 of most frequent monster subtypes

d_monster_subtype %>% 
  tabyl(monster_subtype) %>% 
  slice_max(order_by = n, n = 10) %>% 
  adorn_pct_formatting() %>% 
  rename(subtype = monster_subtype)

6.10.4 Monster species

Top 10 of most frequent monster species

d_monster_species %>% 
  tabyl(monster_species) %>% 
  slice_max(order_by = n, n = 10) %>% 
  adorn_pct_formatting() %>% 
  rename(species = monster_species)

6.11 Culprits

d_culprit %>% 
  tabyl(culprit_gender) %>% 
  arrange(desc(n)) %>% 
  adorn_pct_formatting() %>% 
  rename(gender = culprit_gender)

6.12 Motives

d %>% 
  drop_na(motive) %>% 
  tabyl(motive) %>% 
  slice_max(order_by = n, n = 10) %>% 
  adorn_pct_formatting()

7 Visualization

ggplot(
  d %>% 
    select(aired_decade, monster_gender) %>%
    unnest(monster_gender) %>%
    group_by(aired_decade, monster_gender) %>%
    filter(monster_gender %in% c("Male", "Female")) %>%
    tally(), 
  aes(
    fill = as.factor(monster_gender),
    y = n,
    x = as.factor(aired_decade)
  )) +
  labs(
    title = "Monsters gender",
    subtitle = "Male and female monsters by decade in Scooby Doo",
    x = "decade aired",
    y = "") +
  geom_bar(position = "fill", stat = "identity", linetype = "blank") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(
    values = c("Female" = "lightpink", "Male" = "skyblue"), 
    name = "Gender") +
  hrbrthemes::theme_ipsum_rc()

ggplot(
  d %>% 
    select(aired_decade, culprit_gender) %>% 
    unnest(cols = c(culprit_gender)) %>% 
    drop_na(culprit_gender) %>% 
    filter(culprit_gender != "") %>% 
    mutate(culprit_gender = str_trim(culprit_gender)) %>% 
    group_by(aired_decade, culprit_gender) %>%
    tally(), 
  aes(
    fill = fct_rev(as.factor(culprit_gender)),
    y = n,
    x = fct_rev(as.factor(aired_decade))
  )) +
  labs(
    title = "Culprit gender",
    subtitle = "Male and female culprits by decade in Scooby Doo",
    x = "Decade aired",
    y = "") +
  ggchicklet::geom_chicklet(
    position = position_fill(), 
    radius = grid::unit(5, "pt")) +
  scale_y_continuous(
    labels = scales::percent) +
  scale_fill_manual(
    values = c("Female" = "lightpink", "Male" = "skyblue"), 
    name = "Gender") +
    coord_flip() +
  hrbrthemes::theme_ipsum_rc()

sort_order <- d_captured %>% 
  filter(captured) %>% 
  count(character) %>% 
  arrange(n) %>% 
  pull(character)

ggplot(
    left_join(d_captured, select(d, index, aired_decade)) %>%
      filter(captured) %>%
      rename(decade = aired_decade) %>%
      group_by(decade, character) %>%
      tally() %>%
      rename(captured = n),
    aes(
      x = fct_relevel(as.factor(character), sort_order), 
      fill = as.factor(decade), 
      y = captured)
  ) +
  ggchicklet::geom_chicklet(radius = grid::unit(5, "pt")) +
  scale_fill_viridis_d(option = "viridis", direction = -1) +
  labs(
    x = "Character",
    y = "Cumulative count",
    title = "Captured",
    subtitle = "By character and decade",
    caption = "Created by Philomenix",
    fill = "Decade") +
  coord_flip() +
  hrbrthemes::theme_ipsum_rc()

sort_order <- d_caught %>% 
  filter(caught & !(character %in% c("Not", "Other"))) %>% 
  count(character) %>% 
  arrange(n) %>% 
  pull(character)

ggplot(
    left_join(d_caught, select(d, index, aired_decade)) %>%
      filter(caught) %>%
      filter(!(character %in% c("Not", "Other"))) %>% 
      rename(decade = aired_decade) %>%
      group_by(decade, character) %>%
      tally() %>%
      rename(caught = n),
    aes(
      x = fct_relevel(as.factor(character), sort_order), 
      fill = as.factor(decade), 
      y = caught)
  ) +
  ggchicklet::geom_chicklet(radius = grid::unit(5, "pt")) +
  scale_fill_viridis_d(option = "viridis", direction = -1) +
  labs(
    x = "Character",
    y = "Cumulative count",
    title = "Caught",
    subtitle = "By character and decade",
    caption = "Created by Philomenix",
    fill = "Decade") +
  coord_flip() +
  hrbrthemes::theme_ipsum_rc()